package com.cmge.ad.spider.pic.neihantu;

import java.util.ArrayList;
import java.util.List;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import com.cmge.ad.model.Picture;
import com.cmge.ad.spider.pipeline.MysqlAlbumPicturePipeline;

/**
 * @desc	逆袭吧  内涵图	
 * 			http://www.nixiba.com/neihan/2.htm
 * @author	ljt
 * @time	2014-12-29 上午11:16:05
 */
public class NiXiBaPageProcessor implements PageProcessor {

    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);

    public static final String URL_LIST = "/neihan/\\w+";
    
    // 列表最大值
    private int max = 1384;
    
    private boolean flag = true;
    
    @Override
    public void process(Page page) {
    	// 检索当前页面所有段子
    	List<Selectable> cList = page.getHtml().xpath("//div[@class='itembox']").nodes();
    	List<Picture> picList = new ArrayList<Picture>();
		if(null != cList && cList.size() > 0){
			for(Selectable str : cList){
				Picture pic = new Picture();
				String desc = str.xpath("//div[@class='topic']//a[@target='_blank']/text()").get();
				String url = str.xpath("//div[@id='bdshare']/@data").get();
				url = url.substring(url.indexOf("http://tu.nixiba.com/"), url.indexOf("'}"));
				pic.setDesc(desc);
        		pic.setMinImageUrl(url);
        		pic.setMaxImageUrl(url);
        		pic.setSource("nixiba_neihantu");
        		picList.add(pic);
    		}
			page.putField("picList",picList);
			page.putField("type",3);
		}
		
		if(flag){
			for(int i = 2;i<=max;i++){
				page.addTargetRequest("http://www.nixiba.com/neihan/"+i+".htm");
			}
			flag = false;
		}
    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) throws Exception {
    	Spider qsSpider = Spider.create(new NiXiBaPageProcessor())
    					.addUrl("http://www.nixiba.com/neihan/1.htm")
//    					.addPipeline(new RedisPipeline())
//    					.addPipeline(new JsonFilePipeline())
    					.addPipeline(new MysqlAlbumPicturePipeline())
    					.thread(1);
    	qsSpider.start();
    }
}
